import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import calendar
from IPython.display import HTML
import plotly.express as px
data = pd.read_csv("C:\\Users\\TECH\\Desktop\\Data Science\\Movies Streams project\\moviestreams.csv" )
data
| Unnamed: 0 | ID | Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | Type | Directors | Genres | Country | Language | Runtime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 1 | Inception | 2010 | 13+ | 8.8 | 87% | 1 | 0 | 0 | 0 | 0 | Christopher Nolan | Action,Adventure,Sci-Fi,Thriller | United States,United Kingdom | English,Japanese,French | 148.0 |
| 1 | 1 | 2 | The Matrix | 1999 | 18+ | 8.7 | 87% | 1 | 0 | 0 | 0 | 0 | Lana Wachowski,Lilly Wachowski | Action,Sci-Fi | United States | English | 136.0 |
| 2 | 2 | 3 | Avengers: Infinity War | 2018 | 13+ | 8.5 | 84% | 1 | 0 | 0 | 0 | 0 | Anthony Russo,Joe Russo | Action,Adventure,Sci-Fi | United States | English | 149.0 |
| 3 | 3 | 4 | Back to the Future | 1985 | 7+ | 8.5 | 96% | 1 | 0 | 0 | 0 | 0 | Robert Zemeckis | Adventure,Comedy,Sci-Fi | United States | English | 116.0 |
| 4 | 4 | 5 | The Good, the Bad and the Ugly | 1966 | 18+ | 8.8 | 97% | 1 | 0 | 1 | 0 | 0 | Sergio Leone | Western | Italy,Spain,West Germany | Italian | 161.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16739 | 16739 | 16740 | The Ghosts of Buxley Hall | 1980 | NaN | 6.2 | NaN | 0 | 0 | 0 | 1 | 0 | Bruce Bilson | Comedy,Family,Fantasy,Horror | United States | English | 120.0 |
| 16740 | 16740 | 16741 | The Poof Point | 2001 | 7+ | 4.7 | NaN | 0 | 0 | 0 | 1 | 0 | Neal Israel | Comedy,Family,Sci-Fi | United States | English | 90.0 |
| 16741 | 16741 | 16742 | Sharks of Lost Island | 2013 | NaN | 5.7 | NaN | 0 | 0 | 0 | 1 | 0 | Neil Gelinas | Documentary | United States | English | NaN |
| 16742 | 16742 | 16743 | Man Among Cheetahs | 2017 | NaN | 6.6 | NaN | 0 | 0 | 0 | 1 | 0 | Richard Slater-Jones | Documentary | United States | English | NaN |
| 16743 | 16743 | 16744 | In Beaver Valley | 1950 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 0 | James Algar | Documentary,Short,Family | United States | English | 32.0 |
16744 rows × 17 columns
df=pd.DataFrame(data)
cols=df.columns.tolist()
cols
['Unnamed: 0', 'ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type', 'Directors', 'Genres', 'Country', 'Language', 'Runtime']
data = data.drop(['Unnamed: 0','ID'], axis=1)
data
| Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | Type | Directors | Genres | Country | Language | Runtime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Inception | 2010 | 13+ | 8.8 | 87% | 1 | 0 | 0 | 0 | 0 | Christopher Nolan | Action,Adventure,Sci-Fi,Thriller | United States,United Kingdom | English,Japanese,French | 148.0 |
| 1 | The Matrix | 1999 | 18+ | 8.7 | 87% | 1 | 0 | 0 | 0 | 0 | Lana Wachowski,Lilly Wachowski | Action,Sci-Fi | United States | English | 136.0 |
| 2 | Avengers: Infinity War | 2018 | 13+ | 8.5 | 84% | 1 | 0 | 0 | 0 | 0 | Anthony Russo,Joe Russo | Action,Adventure,Sci-Fi | United States | English | 149.0 |
| 3 | Back to the Future | 1985 | 7+ | 8.5 | 96% | 1 | 0 | 0 | 0 | 0 | Robert Zemeckis | Adventure,Comedy,Sci-Fi | United States | English | 116.0 |
| 4 | The Good, the Bad and the Ugly | 1966 | 18+ | 8.8 | 97% | 1 | 0 | 1 | 0 | 0 | Sergio Leone | Western | Italy,Spain,West Germany | Italian | 161.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16739 | The Ghosts of Buxley Hall | 1980 | NaN | 6.2 | NaN | 0 | 0 | 0 | 1 | 0 | Bruce Bilson | Comedy,Family,Fantasy,Horror | United States | English | 120.0 |
| 16740 | The Poof Point | 2001 | 7+ | 4.7 | NaN | 0 | 0 | 0 | 1 | 0 | Neal Israel | Comedy,Family,Sci-Fi | United States | English | 90.0 |
| 16741 | Sharks of Lost Island | 2013 | NaN | 5.7 | NaN | 0 | 0 | 0 | 1 | 0 | Neil Gelinas | Documentary | United States | English | NaN |
| 16742 | Man Among Cheetahs | 2017 | NaN | 6.6 | NaN | 0 | 0 | 0 | 1 | 0 | Richard Slater-Jones | Documentary | United States | English | NaN |
| 16743 | In Beaver Valley | 1950 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 0 | James Algar | Documentary,Short,Family | United States | English | 32.0 |
16744 rows × 15 columns
cols
['Unnamed: 0', 'ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type', 'Directors', 'Genres', 'Country', 'Language', 'Runtime']
data.isna().sum()
Title 0 Year 0 Age 9390 IMDb 571 Rotten Tomatoes 11586 Netflix 0 Hulu 0 Prime Video 0 Disney+ 0 Type 0 Directors 726 Genres 275 Country 435 Language 599 Runtime 592 dtype: int64
data['Age']
0 13+
1 18+
2 13+
3 7+
4 18+
...
16739 NaN
16740 7+
16741 NaN
16742 NaN
16743 NaN
Name: Age, Length: 16744, dtype: object
ageMap={'13+':13,'18+':18,'7+':7,'16+':16,'ALL':0}
data['Age Copy']=data['Age'].map(ageMap)
data
| Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | Type | Directors | Genres | Country | Language | Runtime | Age Copy | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Inception | 2010 | 13+ | 8.8 | 87% | 1 | 0 | 0 | 0 | 0 | Christopher Nolan | Action,Adventure,Sci-Fi,Thriller | United States,United Kingdom | English,Japanese,French | 148.0 | 13.0 |
| 1 | The Matrix | 1999 | 18+ | 8.7 | 87% | 1 | 0 | 0 | 0 | 0 | Lana Wachowski,Lilly Wachowski | Action,Sci-Fi | United States | English | 136.0 | 18.0 |
| 2 | Avengers: Infinity War | 2018 | 13+ | 8.5 | 84% | 1 | 0 | 0 | 0 | 0 | Anthony Russo,Joe Russo | Action,Adventure,Sci-Fi | United States | English | 149.0 | 13.0 |
| 3 | Back to the Future | 1985 | 7+ | 8.5 | 96% | 1 | 0 | 0 | 0 | 0 | Robert Zemeckis | Adventure,Comedy,Sci-Fi | United States | English | 116.0 | 7.0 |
| 4 | The Good, the Bad and the Ugly | 1966 | 18+ | 8.8 | 97% | 1 | 0 | 1 | 0 | 0 | Sergio Leone | Western | Italy,Spain,West Germany | Italian | 161.0 | 18.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 16739 | The Ghosts of Buxley Hall | 1980 | NaN | 6.2 | NaN | 0 | 0 | 0 | 1 | 0 | Bruce Bilson | Comedy,Family,Fantasy,Horror | United States | English | 120.0 | NaN |
| 16740 | The Poof Point | 2001 | 7+ | 4.7 | NaN | 0 | 0 | 0 | 1 | 0 | Neal Israel | Comedy,Family,Sci-Fi | United States | English | 90.0 | 7.0 |
| 16741 | Sharks of Lost Island | 2013 | NaN | 5.7 | NaN | 0 | 0 | 0 | 1 | 0 | Neil Gelinas | Documentary | United States | English | NaN | NaN |
| 16742 | Man Among Cheetahs | 2017 | NaN | 6.6 | NaN | 0 | 0 | 0 | 1 | 0 | Richard Slater-Jones | Documentary | United States | English | NaN | NaN |
| 16743 | In Beaver Valley | 1950 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 0 | James Algar | Documentary,Short,Family | United States | English | 32.0 | NaN |
16744 rows × 16 columns
type(data['Rotten Tomatoes'])
pandas.core.series.Series
pip install plotly pandas
Requirement already satisfied: plotly in c:\users\tech\anaconda3\lib\site-packages (5.9.0) Requirement already satisfied: pandas in c:\users\tech\anaconda3\lib\site-packages (1.4.4) Requirement already satisfied: tenacity>=6.2.0 in c:\users\tech\anaconda3\lib\site-packages (from plotly) (8.0.1) Requirement already satisfied: pytz>=2020.1 in c:\users\tech\anaconda3\lib\site-packages (from pandas) (2022.1) Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\tech\anaconda3\lib\site-packages (from pandas) (2.8.2) Requirement already satisfied: numpy>=1.18.5 in c:\users\tech\anaconda3\lib\site-packages (from pandas) (1.21.5) Requirement already satisfied: six>=1.5 in c:\users\tech\anaconda3\lib\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0) Note: you may need to restart the kernel to use updated packages.
Language=data['Language'].value_counts().head(10)
plt.figure(figsize=(10,8))
sns.barplot(x=Language.index,y=Language.values)
plt.title('Top 10 languages in streaming services')
Text(0.5, 1.0, 'Top 10 languages in streaming services')
#its just like KayanRH dashboard
from IPython.display import HTML
import plotly.express as px
# create a Plotly pie chart
figLanguages = px.pie(data, values=Language.values,
names=Language.index,
title='Top 10 languages in streaming services',
height=600)
# convert the figure to an HTML object
html_fig = HTML(figLanguages.to_html())
figLanguages.show()
fig2=px.bar(data,x=Language.index,y=Language.values,title='Top 10 languages in streaming services')
fig2.show()
from IPython.display import HTML
import plotly.express as px
Age = data['Age'].value_counts().head(10)
# create a Plotly pie chart
figAge = px.pie(values=Age.values,
names=Age.index,
title='Number of movies in specific age group in all services',
height=600)
# convert the figure to an HTML object
html_fig = HTML(figAge.to_html())
# display the chart
figAge.show()
# to customize the hover text of a Plotly pie chart,
Age = data['Age'].value_counts().head(10)
Age_df = pd.DataFrame({'age': Age.index, 'count': Age.values})
figAge = px.pie(Age_df, values='count', names='age', title='Number of movies in specific age group in all services')
figAge.show()
# ALL SERVICES
from IPython.display import HTML
import plotly.express as px
Age = data['Age'].value_counts().head(10)
# create a Plotly pie chart
figAge = px.bar(x=Age.index,
y=Age.values,
title='Number of movies in specific age group in all services',
height=600,
text=Age)
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
# display the chart
figAge.show()
#IN NETFLIX
from IPython.display import HTML
import plotly.express as px
Netflix=data[data['Netflix']==1]
# Age = data['Age'].value_counts().head(10)
# create a Plotly pie chart
figAge = px.bar(Netflix['Age'],
x=Netflix['Age'].value_counts().index,
y=Netflix['Age'].value_counts(),
title='Number of movies in specific age group in NETFLIX',
height=600,
text=Netflix['Age'].value_counts(),
color_discrete_sequence=['#E50914'])
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
# # update the layout with axis titles
# fig.update_layout(
# xaxis=dict(title='Age', categoryorder='category ascending'),
# yaxis=dict(title='Number of movies'),
# )
# display the chart
figAge.show()
#IN Amazon Prime Video
from IPython.display import HTML
import plotly.express as px
PrimeVideo=data[data['Prime Video']==1]
# create a Plotly pie chart
figAge = px.bar(PrimeVideo['Age'],
x=PrimeVideo['Age'].value_counts().index,
y=PrimeVideo['Age'].value_counts(),
title='Number of movies in specific age group in Amazon Prime Video',
height=600,
text=PrimeVideo['Age'].value_counts(),
color_discrete_sequence=['#FF9900'])
figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
#IN Disney+
from IPython.display import HTML
import plotly.express as px
Disney=data[data['Disney+']==1]
# create a Plotly pie chart
figAge = px.bar(Disney['Age'],
x=Disney['Age'].value_counts().index,
y=Disney['Age'].value_counts(),
title='Number of movies in specific age group in Disney+',
height=600,
text=Disney['Age'].value_counts(),
color_discrete_sequence=['#153866']) # set the color of the bars to red
figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
#IN Hulu
from IPython.display import HTML
import plotly.express as px
Hulu=data[data['Hulu']==1]
# create a Plotly pie chart
figAge = px.bar(Hulu['Age'],
x=Hulu['Age'].value_counts().index,
y=Hulu['Age'].value_counts(),
title='Number of movies in specific age group in Hulu',
height=600,
text=Hulu['Age'].value_counts(),
color_discrete_sequence=['#66aa33']) # set the color of the bars to red
figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
from IPython.display import HTML
import plotly.express as px
Hulu=data[data['Hulu']==1]
# create a Plotly pie chart
figAge = px.bar(data,
x=data['New Rotten Tomatoes'].value_counts().sort_index(),
y=data['New Rotten Tomatoes'].value_counts(),
title='Number of movies in specific age group in Rotten Tomatoes Ratings',
height=600,
# text=data['New Rotten Tomatoes'].value_counts(),
color_discrete_sequence=['blue']) # set the color of the bars to red
figAge.update_layout(xaxis_title="Age Group", yaxis_title="Number of Movies") # set x and y axis labels
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
data
data['Hulu'].value_counts()
0 15841 1 903 Name: Hulu, dtype: int64
data['New Rotten Tomatoes'].value_counts().sort_index()
10 20
100 407
11 27
12 11
13 34
...
95 72
96 73
97 61
98 41
99 26
Name: New Rotten Tomatoes, Length: 99, dtype: int64
data['New Rotten Tomatoes'].value_counts()
100 407
80 162
50 136
83 131
67 126
...
28 10
7 10
4 9
3 4
2 4
Name: New Rotten Tomatoes, Length: 99, dtype: int64
rt_scores=pd.DataFrame({'Streaming services':['Netflix','Prime video','Hulu','Disney+'],
'Rotten Tomatos Scores':[Netflix['Rotten Tomatoes'].value_counts()[0],
PrimeVideo['Rotten Tomatoes'].value_counts()[0],
Hulu['Rotten Tomatoes'].value_counts()[0],
Disney['Rotten Tomatoes'].value_counts()[0],
] })
rt_scores
| Streaming services | Rotten Tomatos Scores | |
|---|---|---|
| 0 | Netflix | 130 |
| 1 | Prime video | 257 |
| 2 | Hulu | 18 |
| 3 | Disney+ | 19 |
sort_rt_scores=rt_scores.sort_values(ascending=False, by = 'Rotten Tomatos Scores')
sort_rt_scores
| Streaming services | Rotten Tomatos Scores | |
|---|---|---|
| 1 | Prime video | 257 |
| 0 | Netflix | 130 |
| 3 | Disney+ | 19 |
| 2 | Hulu | 18 |
from IPython.display import HTML
import plotly.express as px
Hulu=data[data['Hulu']==1]
# create a Plotly pie chart
figAge = px.bar(sort_rt_scores,
x=sort_rt_scores['Streaming services'],
y=sort_rt_scores['Rotten Tomatos Scores'],
title='Rotten tomatos Rating for each services',
text=sort_rt_scores['Rotten Tomatos Scores'],
height=600,
# text=data['New Rotten Tomatoes'].value_counts(),
color_discrete_sequence=['blue']) # set the color of the bars to red
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
from IPython.display import HTML
import plotly.express as px
IMDb=data[data['IMDb']==1]
figAge = px.bar(data,
x=data['IMDb'].value_counts().index,
y=data['IMDb'].value_counts(),
title='IMDb Ratings',
text=data['IMDb'].value_counts(),
height=600,
# text=data['New Rotten Tomatoes'].value_counts(),
color_discrete_sequence=['#f3ce13']) # set the color of the bars to red
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
RuntimeCount = pd.DataFrame(data['Runtime'].value_counts().sort_values(ascending=False)[:10].items(), columns=['Runtime', 'Count'])
RuntimeCount
| Runtime | Count | |
|---|---|---|
| 0 | 90.0 | 971 |
| 1 | 95.0 | 489 |
| 2 | 92.0 | 434 |
| 3 | 93.0 | 422 |
| 4 | 85.0 | 408 |
| ... | ... | ... |
| 152 | 19.0 | 8 |
| 153 | 32.0 | 8 |
| 154 | 9.0 | 8 |
| 155 | 7.0 | 8 |
| 156 | 10.0 | 8 |
157 rows × 2 columns
figAge = px.bar(RuntimeCount,
x=RuntimeCount['Runtime'],
y=RuntimeCount['Count'],
title='Counts of Runtime of movies',
text=RuntimeCount['Runtime'],
height=600,
color_discrete_sequence=['#f3ce13']) # set the color of the bars
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
# Group the data by Runtime and count the number of occurrences
RuntimeCount = data.groupby('Runtime').size().reset_index(name='Count')
# Create the bar chart
figAge = px.bar(RuntimeCount,
x='Runtime',
y='Count',
title='Counts of Runtime of movies',
text='Count',
height=600,
color_discrete_sequence=['#f3ce13']) # set the color of the bars
# convert the figure to an HTML object
figAge.update_traces(texttemplate='%{text:2s}',textposition='outside')
HTML(figAge.to_html())
figAge.show()
#there are an example from 1.13 till end